//go:build !noasm && amd64
// AUTO-GENERATED BY GOAT -- DO NOT EDIT

TEXT ·hamming_512(SB), $0-32
	MOVQ a+0(FP), DI
	MOVQ b+8(FP), SI
	MOVQ res+16(FP), DX
	MOVQ len+24(FP), CX
	BYTE $0x55                     // pushq	%rbp
	WORD $0x8948; BYTE $0xe5       // movq	%rsp, %rbp
	WORD $0x5641                   // pushq	%r14
	BYTE $0x53                     // pushq	%rbx
	LONG $0xf8e48348               // andq	$-8, %rsp
	WORD $0x8b4c; BYTE $0x19       // movq	(%rcx), %r11
	LONG $0x07fb8341               // cmpl	$7, %r11d
	JG   LBB0_15
	LONG $0xffffb941; WORD $0xffff // movl	$4294967295, %r9d               # imm = 0xFFFFFFFF
	LONG $0x0b148d47               // leal	(%r11,%r9), %r10d
	WORD $0xc031                   // xorl	%eax, %eax
	LONG $0x07fa8341               // cmpl	$7, %r10d
	JAE  LBB0_3
	WORD $0x8949; BYTE $0xf1       // movq	%rsi, %r9
	WORD $0x8948; BYTE $0xfb       // movq	%rdi, %rbx
	JMP  LBB0_13

LBB0_15:
	LONG $0xc0eff9c5                           // vpxor	%xmm0, %xmm0, %xmm0
	LONG $0x80fb8141; WORD $0x0000; BYTE $0x00 // cmpl	$128, %r11d
	JB   LBB0_16
	LONG $0xc9eff1c5                           // vpxor	%xmm1, %xmm1, %xmm1
	LONG $0xd2efe9c5                           // vpxor	%xmm2, %xmm2, %xmm2
	LONG $0xdbefe1c5                           // vpxor	%xmm3, %xmm3, %xmm3
	LONG $0xe4efd9c5                           // vpxor	%xmm4, %xmm4, %xmm4
	LONG $0xedefd1c5                           // vpxor	%xmm5, %xmm5, %xmm5
	LONG $0xf6efc9c5                           // vpxor	%xmm6, %xmm6, %xmm6
	LONG $0xffefc1c5                           // vpxor	%xmm7, %xmm7, %xmm7

LBB0_28:
	LONG $0x487c7162; WORD $0x0710             // vmovups	(%rdi), %zmm8
	LONG $0x487c7162; WORD $0x4f10; BYTE $0x01 // vmovups	64(%rdi), %zmm9
	LONG $0x487c7162; WORD $0x5710; BYTE $0x02 // vmovups	128(%rdi), %zmm10
	LONG $0x487c7162; WORD $0x5f10; BYTE $0x03 // vmovups	192(%rdi), %zmm11
	LONG $0x487c7162; WORD $0x6710; BYTE $0x04 // vmovups	256(%rdi), %zmm12
	LONG $0x487c7162; WORD $0x6f10; BYTE $0x05 // vmovups	320(%rdi), %zmm13
	LONG $0x487c7162; WORD $0x7710; BYTE $0x06 // vmovups	384(%rdi), %zmm14
	LONG $0x483cf162; WORD $0x06c2; BYTE $0x0c // vcmpneq_oqps	(%rsi), %zmm8, %k0
	QUAD $0x0c014ec24834f162                   // vcmpneq_oqps	64(%rsi), %zmm9, %k1
	QUAD $0x0c0256c2482cf162                   // vcmpneq_oqps	128(%rsi), %zmm10, %k2
	QUAD $0x0c035ec24824f162                   // vcmpneq_oqps	192(%rsi), %zmm11, %k3
	QUAD $0x0c0466c2481cf162                   // vcmpneq_oqps	256(%rsi), %zmm12, %k4
	QUAD $0x0c056ec24814f162                   // vcmpneq_oqps	320(%rsi), %zmm13, %k5
	QUAD $0x0c0676c2480cf162                   // vcmpneq_oqps	384(%rsi), %zmm14, %k6
	LONG $0x487c7162; WORD $0x4710; BYTE $0x07 // vmovups	448(%rdi), %zmm8
	QUAD $0x0c077ec2483cf162                   // vcmpneq_oqps	448(%rsi), %zmm8, %k7
	LONG $0x487e7262; WORD $0xc038             // vpmovm2d	%k0, %zmm8
	LONG $0x4875d162; WORD $0xc8fa             // vpsubd	%zmm8, %zmm1, %zmm1
	LONG $0x487e7262; WORD $0xc738             // vpmovm2d	%k7, %zmm8
	LONG $0x4875d162; WORD $0xc8fa             // vpsubd	%zmm8, %zmm1, %zmm1
	LONG $0x487e7262; WORD $0xc138             // vpmovm2d	%k1, %zmm8
	LONG $0x486dd162; WORD $0xd0fa             // vpsubd	%zmm8, %zmm2, %zmm2
	LONG $0x487e7262; WORD $0xc238             // vpmovm2d	%k2, %zmm8
	LONG $0x4865d162; WORD $0xd8fa             // vpsubd	%zmm8, %zmm3, %zmm3
	LONG $0x487e7262; WORD $0xc338             // vpmovm2d	%k3, %zmm8
	LONG $0x485dd162; WORD $0xe0fa             // vpsubd	%zmm8, %zmm4, %zmm4
	LONG $0x487e7262; WORD $0xc438             // vpmovm2d	%k4, %zmm8
	LONG $0x4855d162; WORD $0xe8fa             // vpsubd	%zmm8, %zmm5, %zmm5
	LONG $0x487e7262; WORD $0xc538             // vpmovm2d	%k5, %zmm8
	LONG $0x484dd162; WORD $0xf0fa             // vpsubd	%zmm8, %zmm6, %zmm6
	LONG $0x487e7262; WORD $0xc638             // vpmovm2d	%k6, %zmm8
	LONG $0x4845d162; WORD $0xf8fa             // vpsubd	%zmm8, %zmm7, %zmm7
	LONG $0x80c38341                           // addl	$-128, %r11d
	LONG $0x00c78148; WORD $0x0002; BYTE $0x00 // addq	$512, %rdi                      # imm = 0x200
	LONG $0x00c68148; WORD $0x0002; BYTE $0x00 // addq	$512, %rsi                      # imm = 0x200
	LONG $0xc3be0f41                           // movsbl	%r11b, %eax
	WORD $0x3944; BYTE $0xd8                   // cmpl	%r11d, %eax
	JNE  LBB0_28
	LONG $0x4865f162; WORD $0xd2fe             // vpaddd	%zmm2, %zmm3, %zmm2
	LONG $0x486df162; WORD $0xd4fe             // vpaddd	%zmm4, %zmm2, %zmm2
	LONG $0x486df162; WORD $0xd5fe             // vpaddd	%zmm5, %zmm2, %zmm2
	LONG $0x486df162; WORD $0xd6fe             // vpaddd	%zmm6, %zmm2, %zmm2
	LONG $0x486df162; WORD $0xd7fe             // vpaddd	%zmm7, %zmm2, %zmm2
	LONG $0x486df162; WORD $0xc9fe             // vpaddd	%zmm1, %zmm2, %zmm1
	LONG $0x48fdf362; WORD $0xca3b; BYTE $0x01 // vextracti64x4	$1, %zmm1, %ymm2
	LONG $0xc9feedc5                           // vpaddd	%ymm1, %ymm2, %ymm1
	WORD $0x8545; BYTE $0xdb                   // testl	%r11d, %r11d
	JE   LBB0_30
	LONG $0x20fb8341                           // cmpl	$32, %r11d
	JB   LBB0_18

LBB0_31:
	LONG $0xd2efe9c5 // vpxor	%xmm2, %xmm2, %xmm2
	LONG $0xdbefe1c5 // vpxor	%xmm3, %xmm3, %xmm3
	LONG $0xc0eff9c5 // vpxor	%xmm0, %xmm0, %xmm0

LBB0_32:
	LONG $0x2610fcc5               // vmovups	(%rsi), %ymm4
	LONG $0x6e10fcc5; BYTE $0x20   // vmovups	32(%rsi), %ymm5
	LONG $0x7610fcc5; BYTE $0x40   // vmovups	64(%rsi), %ymm6
	LONG $0x7e10fcc5; BYTE $0x60   // vmovups	96(%rsi), %ymm7
	LONG $0x27c2dcc5; BYTE $0x0c   // vcmpneq_oqps	(%rdi), %ymm4, %ymm4
	LONG $0x6fc2d4c5; WORD $0x0c20 // vcmpneq_oqps	32(%rdi), %ymm5, %ymm5
	LONG $0xccfaf5c5               // vpsubd	%ymm4, %ymm1, %ymm1
	LONG $0xc5fafdc5               // vpsubd	%ymm5, %ymm0, %ymm0
	LONG $0x67c2ccc5; WORD $0x0c40 // vcmpneq_oqps	64(%rdi), %ymm6, %ymm4
	LONG $0xdcfae5c5               // vpsubd	%ymm4, %ymm3, %ymm3
	LONG $0x67c2c4c5; WORD $0x0c60 // vcmpneq_oqps	96(%rdi), %ymm7, %ymm4
	LONG $0xd4faedc5               // vpsubd	%ymm4, %ymm2, %ymm2
	LONG $0xe0c38341               // addl	$-32, %r11d
	LONG $0x80ef8348               // subq	$-128, %rdi
	LONG $0x80ee8348               // subq	$-128, %rsi
	LONG $0x1ffb8341               // cmpl	$31, %r11d
	JA   LBB0_32
	LONG $0x08fb8341               // cmpl	$8, %r11d
	JAE  LBB0_20
	JMP  LBB0_25

LBB0_3:
	QUAD $0x0001ffffffc0b849; WORD $0x0000 // movabsq	$8589934528, %r8                # imm = 0x1FFFFFFC0
	LONG $0x3ffa8341                       // cmpl	$63, %r10d
	JAE  LBB0_8
	WORD $0xc031                           // xorl	%eax, %eax
	WORD $0xc931                           // xorl	%ecx, %ecx
	JMP  LBB0_5

LBB0_16:
	LONG $0xc9eff1c5 // vpxor	%xmm1, %xmm1, %xmm1
	LONG $0x20fb8341 // cmpl	$32, %r11d
	JAE  LBB0_31

LBB0_18:
	LONG $0xdbefe1c5 // vpxor	%xmm3, %xmm3, %xmm3
	LONG $0xd2efe9c5 // vpxor	%xmm2, %xmm2, %xmm2
	LONG $0x08fb8341 // cmpl	$8, %r11d
	JB   LBB0_25

LBB0_20:
	LONG $0xf8438d41         // leal	-8(%r11), %eax
	WORD $0xc189             // movl	%eax, %ecx
	WORD $0xe9c1; BYTE $0x03 // shrl	$3, %ecx
	WORD $0x598d; BYTE $0x01 // leal	1(%rcx), %ebx
	WORD $0xc3f6; BYTE $0x03 // testb	$3, %bl
	JE   LBB0_24
	WORD $0xc180; BYTE $0x01 // addb	$1, %cl
	WORD $0xb60f; BYTE $0xc9 // movzbl	%cl, %ecx
	WORD $0xe183; BYTE $0x03 // andl	$3, %ecx
	LONG $0x03e1c148         // shlq	$3, %rcx
	WORD $0xdb31             // xorl	%ebx, %ebx

LBB0_22:
	LONG $0x2610fcc5             // vmovups	(%rsi), %ymm4
	LONG $0x27c2dcc5; BYTE $0x0c // vcmpneq_oqps	(%rdi), %ymm4, %ymm4
	LONG $0xccfaf5c5             // vpsubd	%ymm4, %ymm1, %ymm1
	LONG $0x20c78348             // addq	$32, %rdi
	LONG $0x20c68348             // addq	$32, %rsi
	LONG $0x08c38348             // addq	$8, %rbx
	WORD $0xd939                 // cmpl	%ebx, %ecx
	JNE  LBB0_22
	WORD $0x2941; BYTE $0xdb     // subl	%ebx, %r11d

LBB0_24:
	WORD $0xf883; BYTE $0x18 // cmpl	$24, %eax
	JB   LBB0_25

LBB0_49:
	LONG $0x2610fcc5               // vmovups	(%rsi), %ymm4
	LONG $0x6e10fcc5; BYTE $0x20   // vmovups	32(%rsi), %ymm5
	LONG $0x7610fcc5; BYTE $0x40   // vmovups	64(%rsi), %ymm6
	LONG $0x7e10fcc5; BYTE $0x60   // vmovups	96(%rsi), %ymm7
	LONG $0x27c2dcc5; BYTE $0x0c   // vcmpneq_oqps	(%rdi), %ymm4, %ymm4
	LONG $0x6fc2d4c5; WORD $0x0c20 // vcmpneq_oqps	32(%rdi), %ymm5, %ymm5
	LONG $0xccfaf5c5               // vpsubd	%ymm4, %ymm1, %ymm1
	LONG $0xcdfaf5c5               // vpsubd	%ymm5, %ymm1, %ymm1
	LONG $0x67c2ccc5; WORD $0x0c40 // vcmpneq_oqps	64(%rdi), %ymm6, %ymm4
	LONG $0xccfaf5c5               // vpsubd	%ymm4, %ymm1, %ymm1
	LONG $0x67c2c4c5; WORD $0x0c60 // vcmpneq_oqps	96(%rdi), %ymm7, %ymm4
	LONG $0xccfaf5c5               // vpsubd	%ymm4, %ymm1, %ymm1
	LONG $0xe0c38341               // addl	$-32, %r11d
	LONG $0x80ef8348               // subq	$-128, %rdi
	LONG $0x80ee8348               // subq	$-128, %rsi
	LONG $0x07fb8341               // cmpl	$7, %r11d
	JA   LBB0_49

LBB0_25:
	WORD $0x8545; BYTE $0xdb // testl	%r11d, %r11d
	JE   LBB0_26
	LONG $0xff438d45         // leal	-1(%r11), %r8d
	WORD $0xc931             // xorl	%ecx, %ecx
	LONG $0x07f88341         // cmpl	$7, %r8d
	JAE  LBB0_35
	WORD $0x8949; BYTE $0xfe // movq	%rdi, %r14
	WORD $0x8948; BYTE $0xf3 // movq	%rsi, %rbx
	JMP  LBB0_47

LBB0_26:
	WORD $0xc931 // xorl	%ecx, %ecx
	JMP  LBB0_40

LBB0_35:
	QUAD $0x0001ffffffc0b949; WORD $0x0000 // movabsq	$8589934528, %r9                # imm = 0x1FFFFFFC0
	LONG $0x3ff88341                       // cmpl	$63, %r8d
	JAE  LBB0_42
	WORD $0xc931                           // xorl	%ecx, %ecx
	WORD $0x3145; BYTE $0xd2               // xorl	%r10d, %r10d
	JMP  LBB0_37

LBB0_8:
	LONG $0x01c28349               // addq	$1, %r10
	WORD $0x894c; BYTE $0xd1       // movq	%r10, %rcx
	WORD $0x214c; BYTE $0xc1       // andq	%r8, %rcx
	QUAD $0x0000000095048d4a       // leaq	(,%r10,4), %rax
	LONG $0xff002548; WORD $0xffff // andq	$-256, %rax
	LONG $0xc0eff9c5               // vpxor	%xmm0, %xmm0, %xmm0
	WORD $0xdb31                   // xorl	%ebx, %ebx
	LONG $0xc9eff1c5               // vpxor	%xmm1, %xmm1, %xmm1
	LONG $0xd2efe9c5               // vpxor	%xmm2, %xmm2, %xmm2
	LONG $0xdbefe1c5               // vpxor	%xmm3, %xmm3, %xmm3

LBB0_9:
	LONG $0x487cf162; WORD $0x2410; BYTE $0x1f // vmovups	(%rdi,%rbx), %zmm4
	QUAD $0x011f6c10487cf162                   // vmovups	64(%rdi,%rbx), %zmm5
	QUAD $0x021f7410487cf162                   // vmovups	128(%rdi,%rbx), %zmm6
	QUAD $0x031f7c10487cf162                   // vmovups	192(%rdi,%rbx), %zmm7
	QUAD $0x041e04c2485cf162                   // vcmpneqps	(%rsi,%rbx), %zmm4, %k0
	QUAD $0x011e4cc24854f162; BYTE $0x04       // vcmpneqps	64(%rsi,%rbx), %zmm5, %k1
	QUAD $0x021e54c2484cf162; BYTE $0x04       // vcmpneqps	128(%rsi,%rbx), %zmm6, %k2
	QUAD $0x031e5cc24844f162; BYTE $0x04       // vcmpneqps	192(%rsi,%rbx), %zmm7, %k3
	LONG $0x487ef262; WORD $0xe038             // vpmovm2d	%k0, %zmm4
	LONG $0x487df162; WORD $0xc4fa             // vpsubd	%zmm4, %zmm0, %zmm0
	LONG $0x487ef262; WORD $0xe138             // vpmovm2d	%k1, %zmm4
	LONG $0x4875f162; WORD $0xccfa             // vpsubd	%zmm4, %zmm1, %zmm1
	LONG $0x487ef262; WORD $0xe238             // vpmovm2d	%k2, %zmm4
	LONG $0x486df162; WORD $0xd4fa             // vpsubd	%zmm4, %zmm2, %zmm2
	LONG $0x487ef262; WORD $0xe338             // vpmovm2d	%k3, %zmm4
	LONG $0x4865f162; WORD $0xdcfa             // vpsubd	%zmm4, %zmm3, %zmm3
	LONG $0x00c38148; WORD $0x0001; BYTE $0x00 // addq	$256, %rbx                      # imm = 0x100
	WORD $0x3948; BYTE $0xd8                   // cmpq	%rbx, %rax
	JNE  LBB0_9
	LONG $0x4875f162; WORD $0xc0fe             // vpaddd	%zmm0, %zmm1, %zmm0
	LONG $0x486df162; WORD $0xc0fe             // vpaddd	%zmm0, %zmm2, %zmm0
	LONG $0x4865f162; WORD $0xc0fe             // vpaddd	%zmm0, %zmm3, %zmm0
	LONG $0x48fdf362; WORD $0xc13b; BYTE $0x01 // vextracti64x4	$1, %zmm0, %ymm1
	LONG $0x487df162; WORD $0xc1fe             // vpaddd	%zmm1, %zmm0, %zmm0
	LONG $0x397de3c4; WORD $0x01c1             // vextracti128	$1, %ymm0, %xmm1
	LONG $0xc1fef9c5                           // vpaddd	%xmm1, %xmm0, %xmm0
	LONG $0xc870f9c5; BYTE $0xee               // vpshufd	$238, %xmm0, %xmm1              # xmm1 = xmm0[2,3,2,3]
	LONG $0xc1fef9c5                           // vpaddd	%xmm1, %xmm0, %xmm0
	LONG $0xc870f9c5; BYTE $0x55               // vpshufd	$85, %xmm0, %xmm1               # xmm1 = xmm0[1,1,1,1]
	LONG $0xc1fef9c5                           // vpaddd	%xmm1, %xmm0, %xmm0
	LONG $0xc07ef9c5                           // vmovd	%xmm0, %eax
	WORD $0x3949; BYTE $0xca                   // cmpq	%rcx, %r10
	JE   LBB0_41
	LONG $0x38c2f641                           // testb	$56, %r10b
	JE   LBB0_12

LBB0_5:
	WORD $0x8945; BYTE $0xda // movl	%r11d, %r10d
	WORD $0x0145; BYTE $0xca // addl	%r9d, %r10d
	LONG $0x01c28349         // addq	$1, %r10
	LONG $0x38c08349         // addq	$56, %r8
	WORD $0x214d; BYTE $0xd0 // andq	%r10, %r8
	WORD $0x2945; BYTE $0xc3 // subl	%r8d, %r11d
	LONG $0x860c8d4e         // leaq	(%rsi,%r8,4), %r9
	LONG $0x871c8d4a         // leaq	(%rdi,%r8,4), %rbx
	LONG $0xc06ef9c5         // vmovd	%eax, %xmm0

LBB0_6:
	LONG $0x0c10fcc5; BYTE $0x8e   // vmovups	(%rsi,%rcx,4), %ymm1
	LONG $0x0cc2f4c5; WORD $0x048f // vcmpneqps	(%rdi,%rcx,4), %ymm1, %ymm1
	LONG $0xc1fafdc5               // vpsubd	%ymm1, %ymm0, %ymm0
	LONG $0x08c18348               // addq	$8, %rcx
	WORD $0x3949; BYTE $0xc8       // cmpq	%rcx, %r8
	JNE  LBB0_6
	LONG $0x397de3c4; WORD $0x01c1 // vextracti128	$1, %ymm0, %xmm1
	LONG $0xc1fef9c5               // vpaddd	%xmm1, %xmm0, %xmm0
	LONG $0xc870f9c5; BYTE $0xee   // vpshufd	$238, %xmm0, %xmm1              # xmm1 = xmm0[2,3,2,3]
	LONG $0xc1fef9c5               // vpaddd	%xmm1, %xmm0, %xmm0
	LONG $0xc870f9c5; BYTE $0x55   // vpshufd	$85, %xmm0, %xmm1               # xmm1 = xmm0[1,1,1,1]
	LONG $0xc1fef9c5               // vpaddd	%xmm1, %xmm0, %xmm0
	LONG $0xc07ef9c5               // vmovd	%xmm0, %eax
	WORD $0x394d; BYTE $0xc2       // cmpq	%r8, %r10
	JNE  LBB0_13
	JMP  LBB0_41

LBB0_30:
	LONG $0x0275e2c4; BYTE $0xc1   // vphaddd	%ymm1, %ymm1, %ymm0
	LONG $0x027de2c4; BYTE $0xc0   // vphaddd	%ymm0, %ymm0, %ymm0
	LONG $0x397de3c4; WORD $0x01c1 // vextracti128	$1, %ymm0, %xmm1
	LONG $0xc0fef1c5               // vpaddd	%xmm0, %xmm1, %xmm0
	LONG $0xc07ef9c5               // vmovd	%xmm0, %eax
	JMP  LBB0_41

LBB0_42:
	LONG $0x01408d49         // leaq	1(%r8), %rax
	WORD $0x8949; BYTE $0xc2 // movq	%rax, %r10
	WORD $0x214d; BYTE $0xca // andq	%r9, %r10
	LONG $0xe4efd9c5         // vpxor	%xmm4, %xmm4, %xmm4
	WORD $0xc931             // xorl	%ecx, %ecx
	LONG $0xedefd1c5         // vpxor	%xmm5, %xmm5, %xmm5
	LONG $0xf6efc9c5         // vpxor	%xmm6, %xmm6, %xmm6
	LONG $0xffefc1c5         // vpxor	%xmm7, %xmm7, %xmm7

LBB0_43:
	LONG $0x487c7162; WORD $0x0410; BYTE $0x8f // vmovups	(%rdi,%rcx,4), %zmm8
	QUAD $0x018f4c10487c7162                   // vmovups	64(%rdi,%rcx,4), %zmm9
	QUAD $0x028f5410487c7162                   // vmovups	128(%rdi,%rcx,4), %zmm10
	QUAD $0x038f5c10487c7162                   // vmovups	192(%rdi,%rcx,4), %zmm11
	QUAD $0x048e04c2483cf162                   // vcmpneqps	(%rsi,%rcx,4), %zmm8, %k0
	QUAD $0x018e4cc24834f162; BYTE $0x04       // vcmpneqps	64(%rsi,%rcx,4), %zmm9, %k1
	QUAD $0x028e54c2482cf162; BYTE $0x04       // vcmpneqps	128(%rsi,%rcx,4), %zmm10, %k2
	QUAD $0x038e5cc24824f162; BYTE $0x04       // vcmpneqps	192(%rsi,%rcx,4), %zmm11, %k3
	LONG $0x487e7262; WORD $0xc038             // vpmovm2d	%k0, %zmm8
	LONG $0x485dd162; WORD $0xe0fa             // vpsubd	%zmm8, %zmm4, %zmm4
	LONG $0x487e7262; WORD $0xc138             // vpmovm2d	%k1, %zmm8
	LONG $0x4855d162; WORD $0xe8fa             // vpsubd	%zmm8, %zmm5, %zmm5
	LONG $0x487e7262; WORD $0xc238             // vpmovm2d	%k2, %zmm8
	LONG $0x484dd162; WORD $0xf0fa             // vpsubd	%zmm8, %zmm6, %zmm6
	LONG $0x487e7262; WORD $0xc338             // vpmovm2d	%k3, %zmm8
	LONG $0x4845d162; WORD $0xf8fa             // vpsubd	%zmm8, %zmm7, %zmm7
	LONG $0x40c18348                           // addq	$64, %rcx
	WORD $0x3949; BYTE $0xca                   // cmpq	%rcx, %r10
	JNE  LBB0_43
	LONG $0x4855f162; WORD $0xe4fe             // vpaddd	%zmm4, %zmm5, %zmm4
	LONG $0x484df162; WORD $0xe4fe             // vpaddd	%zmm4, %zmm6, %zmm4
	LONG $0x4845f162; WORD $0xe4fe             // vpaddd	%zmm4, %zmm7, %zmm4
	LONG $0x48fdf362; WORD $0xe53b; BYTE $0x01 // vextracti64x4	$1, %zmm4, %ymm5
	LONG $0x485df162; WORD $0xe5fe             // vpaddd	%zmm5, %zmm4, %zmm4
	LONG $0x397de3c4; WORD $0x01e5             // vextracti128	$1, %ymm4, %xmm5
	LONG $0xe5fed9c5                           // vpaddd	%xmm5, %xmm4, %xmm4
	LONG $0xec70f9c5; BYTE $0xee               // vpshufd	$238, %xmm4, %xmm5              # xmm5 = xmm4[2,3,2,3]
	LONG $0xe5fed9c5                           // vpaddd	%xmm5, %xmm4, %xmm4
	LONG $0xec70f9c5; BYTE $0x55               // vpshufd	$85, %xmm4, %xmm5               # xmm5 = xmm4[1,1,1,1]
	LONG $0xe5fed9c5                           // vpaddd	%xmm5, %xmm4, %xmm4
	LONG $0xe17ef9c5                           // vmovd	%xmm4, %ecx
	WORD $0x394c; BYTE $0xd0                   // cmpq	%r10, %rax
	JE   LBB0_40
	WORD $0x38a8                               // testb	$56, %al
	JE   LBB0_46

LBB0_37:
	LONG $0x01c08349         // addq	$1, %r8
	LONG $0x38c18349         // addq	$56, %r9
	WORD $0x214d; BYTE $0xc1 // andq	%r8, %r9
	LONG $0x8f348d4e         // leaq	(%rdi,%r9,4), %r14
	LONG $0x8e1c8d4a         // leaq	(%rsi,%r9,4), %rbx
	WORD $0x2945; BYTE $0xcb // subl	%r9d, %r11d
	LONG $0xe16ef9c5         // vmovd	%ecx, %xmm4

LBB0_38:
	LONG $0x107ca1c4; WORD $0x962c             // vmovups	(%rsi,%r10,4), %ymm5
	LONG $0xc254a1c4; WORD $0x972c; BYTE $0x04 // vcmpneqps	(%rdi,%r10,4), %ymm5, %ymm5
	LONG $0xe5faddc5                           // vpsubd	%ymm5, %ymm4, %ymm4
	LONG $0x08c28349                           // addq	$8, %r10
	WORD $0x394d; BYTE $0xd1                   // cmpq	%r10, %r9
	JNE  LBB0_38
	LONG $0x397de3c4; WORD $0x01e5             // vextracti128	$1, %ymm4, %xmm5
	LONG $0xe5fed9c5                           // vpaddd	%xmm5, %xmm4, %xmm4
	LONG $0xec70f9c5; BYTE $0xee               // vpshufd	$238, %xmm4, %xmm5              # xmm5 = xmm4[2,3,2,3]
	LONG $0xe5fed9c5                           // vpaddd	%xmm5, %xmm4, %xmm4
	LONG $0xec70f9c5; BYTE $0x55               // vpshufd	$85, %xmm4, %xmm5               # xmm5 = xmm4[1,1,1,1]
	LONG $0xe5fed9c5                           // vpaddd	%xmm5, %xmm4, %xmm4
	LONG $0xe17ef9c5                           // vmovd	%xmm4, %ecx
	WORD $0x394d; BYTE $0xc8                   // cmpq	%r9, %r8
	JNE  LBB0_47
	JMP  LBB0_40

LBB0_12:
	LONG $0x8f1c8d48         // leaq	(%rdi,%rcx,4), %rbx
	LONG $0x8e0c8d4c         // leaq	(%rsi,%rcx,4), %r9
	WORD $0x2941; BYTE $0xcb // subl	%ecx, %r11d

LBB0_13:
	WORD $0x8944; BYTE $0xd9 // movl	%r11d, %ecx
	WORD $0xf631             // xorl	%esi, %esi

LBB0_14:
	LONG $0x0410fac5; BYTE $0xb3 // vmovss	(%rbx,%rsi,4), %xmm0            # xmm0 = mem[0],zero,zero,zero
	QUAD $0x04b104c2087ed162     // vcmpneqss	(%r9,%rsi,4), %xmm0, %k0
	LONG $0xf893f8c5             // kmovw	%k0, %edi
	WORD $0xf801                 // addl	%edi, %eax
	LONG $0x01c68348             // addq	$1, %rsi
	WORD $0xf139                 // cmpl	%esi, %ecx
	JNE  LBB0_14
	JMP  LBB0_41

LBB0_46:
	WORD $0x2945; BYTE $0xd3 // subl	%r10d, %r11d
	LONG $0x961c8d4a         // leaq	(%rsi,%r10,4), %rbx
	LONG $0x97348d4e         // leaq	(%rdi,%r10,4), %r14

LBB0_47:
	WORD $0x8944; BYTE $0xde // movl	%r11d, %esi
	WORD $0xff31             // xorl	%edi, %edi

LBB0_48:
	LONG $0x107ac1c4; WORD $0xbe24 // vmovss	(%r14,%rdi,4), %xmm4            # xmm4 = mem[0],zero,zero,zero
	QUAD $0x04bb04c2085ef162       // vcmpneqss	(%rbx,%rdi,4), %xmm4, %k0
	LONG $0xc093f8c5               // kmovw	%k0, %eax
	WORD $0xc101                   // addl	%eax, %ecx
	LONG $0x01c78348               // addq	$1, %rdi
	WORD $0xfe39                   // cmpl	%edi, %esi
	JNE  LBB0_48

LBB0_40:
	LONG $0xc0fee5c5               // vpaddd	%ymm0, %ymm3, %ymm0
	LONG $0xc2fefdc5               // vpaddd	%ymm2, %ymm0, %ymm0
	LONG $0xc1fefdc5               // vpaddd	%ymm1, %ymm0, %ymm0
	LONG $0x027de2c4; BYTE $0xc0   // vphaddd	%ymm0, %ymm0, %ymm0
	LONG $0x027de2c4; BYTE $0xc0   // vphaddd	%ymm0, %ymm0, %ymm0
	LONG $0x397de3c4; WORD $0x01c1 // vextracti128	$1, %ymm0, %xmm1
	LONG $0xc0fef1c5               // vpaddd	%xmm0, %xmm1, %xmm0
	LONG $0xc07ef9c5               // vmovd	%xmm0, %eax
	WORD $0xc801                   // addl	%ecx, %eax

LBB0_41:
	LONG $0xc02a82c5         // vcvtsi2ss	%eax, %xmm15, %xmm0
	LONG $0x0211fac5         // vmovss	%xmm0, (%rdx)
	LONG $0xf0658d48         // leaq	-16(%rbp), %rsp
	BYTE $0x5b               // popq	%rbx
	WORD $0x5e41             // popq	%r14
	BYTE $0x5d               // popq	%rbp
	WORD $0xf8c5; BYTE $0x77 // vzeroupper
	BYTE $0xc3               // retq
